#!/usr/bin/python
#
# top-like utility for displaying kvm statistics
#
# Copyright 2006-2008 Qumranet Technologies
# Copyright 2008-2011 Red Hat, Inc.
#
# Authors:
#  Avi Kivity <avi@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2.  See
# the COPYING file in the top-level directory.

import curses
import sys, os, time, optparse, ctypes
from ctypes import *

class DebugfsProvider(object):
    def __init__(self):
        self.base = '/sys/kernel/debug/kvm'
        self._fields = os.listdir(self.base)
    def fields(self):
        return self._fields
    def select(self, fields):
        self._fields = fields
    def read(self):
        def val(key):
            return int(file(self.base + '/' + key).read())
        return dict([(key, val(key)) for key in self._fields])

vmx_exit_reasons = {
    0: 'EXCEPTION_NMI',
    1: 'EXTERNAL_INTERRUPT',
    2: 'TRIPLE_FAULT',
    7: 'PENDING_INTERRUPT',
    8: 'NMI_WINDOW',
    9: 'TASK_SWITCH',
    10: 'CPUID',
    12: 'HLT',
    14: 'INVLPG',
    15: 'RDPMC',
    16: 'RDTSC',
    18: 'VMCALL',
    19: 'VMCLEAR',
    20: 'VMLAUNCH',
    21: 'VMPTRLD',
    22: 'VMPTRST',
    23: 'VMREAD',
    24: 'VMRESUME',
    25: 'VMWRITE',
    26: 'VMOFF',
    27: 'VMON',
    28: 'CR_ACCESS',
    29: 'DR_ACCESS',
    30: 'IO_INSTRUCTION',
    31: 'MSR_READ',
    32: 'MSR_WRITE',
    33: 'INVALID_STATE',
    36: 'MWAIT_INSTRUCTION',
    39: 'MONITOR_INSTRUCTION',
    40: 'PAUSE_INSTRUCTION',
    41: 'MCE_DURING_VMENTRY',
    43: 'TPR_BELOW_THRESHOLD',
    44: 'APIC_ACCESS',
    48: 'EPT_VIOLATION',
    49: 'EPT_MISCONFIG',
    54: 'WBINVD',
    55: 'XSETBV',
    56: 'APIC_WRITE',
    58: 'INVPCID',
}

svm_exit_reasons = {
    0x000: 'READ_CR0',
    0x003: 'READ_CR3',
    0x004: 'READ_CR4',
    0x008: 'READ_CR8',
    0x010: 'WRITE_CR0',
    0x013: 'WRITE_CR3',
    0x014: 'WRITE_CR4',
    0x018: 'WRITE_CR8',
    0x020: 'READ_DR0',
    0x021: 'READ_DR1',
    0x022: 'READ_DR2',
    0x023: 'READ_DR3',
    0x024: 'READ_DR4',
    0x025: 'READ_DR5',
    0x026: 'READ_DR6',
    0x027: 'READ_DR7',
    0x030: 'WRITE_DR0',
    0x031: 'WRITE_DR1',
    0x032: 'WRITE_DR2',
    0x033: 'WRITE_DR3',
    0x034: 'WRITE_DR4',
    0x035: 'WRITE_DR5',
    0x036: 'WRITE_DR6',
    0x037: 'WRITE_DR7',
    0x040: 'EXCP_BASE',
    0x060: 'INTR',
    0x061: 'NMI',
    0x062: 'SMI',
    0x063: 'INIT',
    0x064: 'VINTR',
    0x065: 'CR0_SEL_WRITE',
    0x066: 'IDTR_READ',
    0x067: 'GDTR_READ',
    0x068: 'LDTR_READ',
    0x069: 'TR_READ',
    0x06a: 'IDTR_WRITE',
    0x06b: 'GDTR_WRITE',
    0x06c: 'LDTR_WRITE',
    0x06d: 'TR_WRITE',
    0x06e: 'RDTSC',
    0x06f: 'RDPMC',
    0x070: 'PUSHF',
    0x071: 'POPF',
    0x072: 'CPUID',
    0x073: 'RSM',
    0x074: 'IRET',
    0x075: 'SWINT',
    0x076: 'INVD',
    0x077: 'PAUSE',
    0x078: 'HLT',
    0x079: 'INVLPG',
    0x07a: 'INVLPGA',
    0x07b: 'IOIO',
    0x07c: 'MSR',
    0x07d: 'TASK_SWITCH',
    0x07e: 'FERR_FREEZE',
    0x07f: 'SHUTDOWN',
    0x080: 'VMRUN',
    0x081: 'VMMCALL',
    0x082: 'VMLOAD',
    0x083: 'VMSAVE',
    0x084: 'STGI',
    0x085: 'CLGI',
    0x086: 'SKINIT',
    0x087: 'RDTSCP',
    0x088: 'ICEBP',
    0x089: 'WBINVD',
    0x08a: 'MONITOR',
    0x08b: 'MWAIT',
    0x08c: 'MWAIT_COND',
    0x08d: 'XSETBV',
    0x400: 'NPF',
}

# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
aarch64_exit_reasons = {
    0x00: 'UNKNOWN',
    0x01: 'WFI',
    0x03: 'CP15_32',
    0x04: 'CP15_64',
    0x05: 'CP14_MR',
    0x06: 'CP14_LS',
    0x07: 'FP_ASIMD',
    0x08: 'CP10_ID',
    0x0C: 'CP14_64',
    0x0E: 'ILL_ISS',
    0x11: 'SVC32',
    0x12: 'HVC32',
    0x13: 'SMC32',
    0x15: 'SVC64',
    0x16: 'HVC64',
    0x17: 'SMC64',
    0x18: 'SYS64',
    0x20: 'IABT',
    0x21: 'IABT_HYP',
    0x22: 'PC_ALIGN',
    0x24: 'DABT',
    0x25: 'DABT_HYP',
    0x26: 'SP_ALIGN',
    0x28: 'FP_EXC32',
    0x2C: 'FP_EXC64',
    0x2F: 'SERROR',
    0x30: 'BREAKPT',
    0x31: 'BREAKPT_HYP',
    0x32: 'SOFTSTP',
    0x33: 'SOFTSTP_HYP',
    0x34: 'WATCHPT',
    0x35: 'WATCHPT_HYP',
    0x38: 'BKPT32',
    0x3A: 'VECTOR32',
    0x3C: 'BRK64',
}

# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
userspace_exit_reasons = {
     0: 'UNKNOWN',
     1: 'EXCEPTION',
     2: 'IO',
     3: 'HYPERCALL',
     4: 'DEBUG',
     5: 'HLT',
     6: 'MMIO',
     7: 'IRQ_WINDOW_OPEN',
     8: 'SHUTDOWN',
     9: 'FAIL_ENTRY',
    10: 'INTR',
    11: 'SET_TPR',
    12: 'TPR_ACCESS',
    13: 'S390_SIEIC',
    14: 'S390_RESET',
    15: 'DCR',
    16: 'NMI',
    17: 'INTERNAL_ERROR',
    18: 'OSI',
    19: 'PAPR_HCALL',
    20: 'S390_UCONTROL',
    21: 'WATCHDOG',
    22: 'S390_TSCH',
    23: 'EPR',
    24: 'SYSTEM_EVENT',
}

x86_exit_reasons = {
    'vmx': vmx_exit_reasons,
    'svm': svm_exit_reasons,
}

sc_perf_evt_open = None
exit_reasons = None

ioctl_numbers = {
    'SET_FILTER' : 0x40082406,
    'ENABLE'     : 0x00002400,
    'DISABLE'    : 0x00002401,
    'RESET'      : 0x00002403,
}

def x86_init(flag):
    globals().update({
        'sc_perf_evt_open' : 298,
        'exit_reasons' : x86_exit_reasons[flag],
    })

def s390_init():
    globals().update({
        'sc_perf_evt_open' : 331
    })

def ppc_init():
    globals().update({
        'sc_perf_evt_open' : 319,
        'ioctl_numbers' : {
            'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16),
            'ENABLE'     : 0x20002400,
            'DISABLE'    : 0x20002401,
        }
    })

def aarch64_init():
    globals().update({
        'sc_perf_evt_open' : 241,
        'exit_reasons' : aarch64_exit_reasons,
    })

def detect_platform():
    if os.uname()[4].startswith('ppc'):
        ppc_init()
        return
    elif os.uname()[4].startswith('aarch64'):
        aarch64_init()
        return

    for line in file('/proc/cpuinfo').readlines():
        if line.startswith('flags'):
            for flag in line.split():
                if flag in x86_exit_reasons:
                    x86_init(flag)
                    return
        elif line.startswith('vendor_id'):
            for flag in line.split():
                if flag == 'IBM/S390':
                    s390_init()
                    return

detect_platform()

def invert(d):
    return dict((x[1], x[0]) for x in d.iteritems())

filters = {}
filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons))
if exit_reasons:
    filters['kvm_exit'] = ('exit_reason', invert(exit_reasons))

import struct, array

libc = ctypes.CDLL('libc.so.6')
syscall = libc.syscall
get_errno = libc.__errno_location
get_errno.restype = POINTER(c_int)

class perf_event_attr(ctypes.Structure):
    _fields_ = [('type', ctypes.c_uint32),
                ('size', ctypes.c_uint32),
                ('config', ctypes.c_uint64),
                ('sample_freq', ctypes.c_uint64),
                ('sample_type', ctypes.c_uint64),
                ('read_format', ctypes.c_uint64),
                ('flags', ctypes.c_uint64),
                ('wakeup_events', ctypes.c_uint32),
                ('bp_type', ctypes.c_uint32),
                ('bp_addr', ctypes.c_uint64),
                ('bp_len', ctypes.c_uint64),
                ]
def _perf_event_open(attr, pid, cpu, group_fd, flags):
    return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid),
                   ctypes.c_int(cpu), ctypes.c_int(group_fd),
                   ctypes.c_long(flags))

PERF_TYPE_HARDWARE              = 0
PERF_TYPE_SOFTWARE              = 1
PERF_TYPE_TRACEPOINT            = 2
PERF_TYPE_HW_CACHE              = 3
PERF_TYPE_RAW                   = 4
PERF_TYPE_BREAKPOINT            = 5

PERF_SAMPLE_IP                  = 1 << 0
PERF_SAMPLE_TID                 = 1 << 1
PERF_SAMPLE_TIME                = 1 << 2
PERF_SAMPLE_ADDR                = 1 << 3
PERF_SAMPLE_READ                = 1 << 4
PERF_SAMPLE_CALLCHAIN           = 1 << 5
PERF_SAMPLE_ID                  = 1 << 6
PERF_SAMPLE_CPU                 = 1 << 7
PERF_SAMPLE_PERIOD              = 1 << 8
PERF_SAMPLE_STREAM_ID           = 1 << 9
PERF_SAMPLE_RAW                 = 1 << 10

PERF_FORMAT_TOTAL_TIME_ENABLED  = 1 << 0
PERF_FORMAT_TOTAL_TIME_RUNNING  = 1 << 1
PERF_FORMAT_ID                  = 1 << 2
PERF_FORMAT_GROUP               = 1 << 3

import re

sys_tracing = '/sys/kernel/debug/tracing'

class Group(object):
    def __init__(self, cpu):
        self.events = []
        self.group_leader = None
        self.cpu = cpu
    def add_event(self, name, event_set, tracepoint, filter = None):
        self.events.append(Event(group = self,
                                 name = name, event_set = event_set,
                                 tracepoint = tracepoint, filter = filter))
        if len(self.events) == 1:
            self.file = os.fdopen(self.events[0].fd)
    def read(self):
        bytes = 8 * (1 + len(self.events))
        fmt = 'xxxxxxxx' + 'q' * len(self.events)
        return dict(zip([event.name for event in self.events],
                        struct.unpack(fmt, self.file.read(bytes))))

class Event(object):
    def __init__(self, group, name, event_set, tracepoint, filter = None):
        self.name = name
        attr = perf_event_attr()
        attr.type = PERF_TYPE_TRACEPOINT
        attr.size = ctypes.sizeof(attr)
        id_path = os.path.join(sys_tracing, 'events', event_set,
                               tracepoint, 'id')
        id = int(file(id_path).read())
        attr.config = id
        attr.sample_type = (PERF_SAMPLE_RAW
                            | PERF_SAMPLE_TIME
                            | PERF_SAMPLE_CPU)
        attr.sample_period = 1
        attr.read_format = PERF_FORMAT_GROUP
        group_leader = -1
        if group.events:
            group_leader = group.events[0].fd
        fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0)
        if fd == -1:
            err = get_errno()[0]
            raise Exception('perf_event_open failed, errno = ' + err.__str__())
        if filter:
            import fcntl
            fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter)
        self.fd = fd
    def enable(self):
        import fcntl
        fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0)
    def disable(self):
        import fcntl
        fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0)
    def reset(self):
        import fcntl
        fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0)

class TracepointProvider(object):
    def __init__(self):
        path = os.path.join(sys_tracing, 'events', 'kvm')
        fields = [f
                  for f in os.listdir(path)
                  if os.path.isdir(os.path.join(path, f))]
        extra = []
        for f in fields:
            if f in filters:
                subfield, values = filters[f]
                for name, number in values.iteritems():
                    extra.append(f + '(' + name + ')')
        fields += extra
        self._setup(fields)
        self.select(fields)
    def fields(self):
        return self._fields

    def _online_cpus(self):
        l = []
        pattern = r'cpu([0-9]+)'
        basedir = '/sys/devices/system/cpu'
        for entry in os.listdir(basedir):
            match = re.match(pattern, entry)
            if not match:
                continue
            path = os.path.join(basedir, entry, 'online')
            if os.path.exists(path) and open(path).read().strip() != '1':
                continue
            l.append(int(match.group(1)))
        return l

    def _setup(self, _fields):
        self._fields = _fields
        cpus = self._online_cpus()
        import resource
        nfiles = len(cpus) * 1000
        resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles))
        events = []
        self.group_leaders = []
        for cpu in cpus:
            group = Group(cpu)
            for name in _fields:
                tracepoint = name
                filter = None
                m = re.match(r'(.*)\((.*)\)', name)
                if m:
                    tracepoint, sub = m.groups()
                    filter = '%s==%d\0' % (filters[tracepoint][0],
                                           filters[tracepoint][1][sub])
                event = group.add_event(name, event_set = 'kvm',
                                        tracepoint = tracepoint,
                                        filter = filter)
            self.group_leaders.append(group)
    def select(self, fields):
        for group in self.group_leaders:
            for event in group.events:
                if event.name in fields:
                    event.reset()
                    event.enable()
                else:
                    event.disable()
    def read(self):
        from collections import defaultdict
        ret = defaultdict(int)
        for group in self.group_leaders:
            for name, val in group.read().iteritems():
                ret[name] += val
        return ret

class Stats:
    def __init__(self, providers, fields = None):
        self.providers = providers
        self.fields_filter = fields
        self._update()
    def _update(self):
        def wanted(key):
            import re
            if not self.fields_filter:
                return True
            return re.match(self.fields_filter, key) is not None
        self.values = dict()
        for d in providers:
            provider_fields = [key for key in d.fields() if wanted(key)]
            for key in provider_fields:
                self.values[key] = None
            d.select(provider_fields)
    def set_fields_filter(self, fields_filter):
        self.fields_filter = fields_filter
        self._update()
    def get(self):
        for d in providers:
            new = d.read()
            for key in d.fields():
                oldval = self.values.get(key, (0, 0))
                newval = new[key]
                newdelta = None
                if oldval is not None:
                    newdelta = newval - oldval[0]
                self.values[key] = (newval, newdelta)
        return self.values

if not os.access('/sys/kernel/debug', os.F_OK):
    print 'Please enable CONFIG_DEBUG_FS in your kernel'
    sys.exit(1)
if not os.access('/sys/kernel/debug/kvm', os.F_OK):
    print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
    print "and ensure the kvm modules are loaded"
    sys.exit(1)

label_width = 40
number_width = 10

def tui(screen, stats):
    curses.use_default_colors()
    curses.noecho()
    drilldown = False
    fields_filter = stats.fields_filter
    def update_drilldown():
        if not fields_filter:
            if drilldown:
                stats.set_fields_filter(None)
            else:
                stats.set_fields_filter(r'^[^\(]*$')
    update_drilldown()
    def refresh(sleeptime):
        screen.erase()
        screen.addstr(0, 0, 'kvm statistics')
        screen.addstr(2, 1, 'Event')
        screen.addstr(2, 1 + label_width + number_width - len('Total'), 'Total')
        screen.addstr(2, 1 + label_width + number_width + 8 - len('Current'), 'Current')
        row = 3
        s = stats.get()
        def sortkey(x):
            if s[x][1]:
                return (-s[x][1], -s[x][0])
            else:
                return (0, -s[x][0])
        for key in sorted(s.keys(), key = sortkey):
            if row >= screen.getmaxyx()[0]:
                break
            values = s[key]
            if not values[0] and not values[1]:
                break
            col = 1
            screen.addstr(row, col, key)
            col += label_width
            screen.addstr(row, col, '%10d' % (values[0],))
            col += number_width
            if values[1] is not None:
                screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
            row += 1
        screen.refresh()

    sleeptime = 0.25
    while True:
        refresh(sleeptime)
        curses.halfdelay(int(sleeptime * 10))
        sleeptime = 3
        try:
            c = screen.getkey()
            if c == 'x':
                drilldown = not drilldown
                update_drilldown()
            if c == 'q':
                break
        except KeyboardInterrupt:
            break
        except curses.error:
            continue

def batch(stats):
    s = stats.get()
    time.sleep(1)
    s = stats.get()
    for key in sorted(s.keys()):
        values = s[key]
        print '%-22s%10d%10d' % (key, values[0], values[1])

def log(stats):
    keys = sorted(stats.get().iterkeys())
    def banner():
        for k in keys:
            print '%10s' % k[0:9],
        print
    def statline():
        s = stats.get()
        for k in keys:
            print ' %9d' % s[k][1],
        print
    line = 0
    banner_repeat = 20
    while True:
        time.sleep(1)
        if line % banner_repeat == 0:
            banner()
        statline()
        line += 1

options = optparse.OptionParser()
options.add_option('-1', '--once', '--batch',
                   action = 'store_true',
                   default = False,
                   dest = 'once',
                   help = 'run in batch mode for one second',
                   )
options.add_option('-l', '--log',
                   action = 'store_true',
                   default = False,
                   dest = 'log',
                   help = 'run in logging mode (like vmstat)',
                   )
options.add_option('-t', '--tracepoints',
                   action = 'store_true',
                   default = False,
                   dest = 'tracepoints',
                   help = 'retrieve statistics from tracepoints',
                   )
options.add_option('-d', '--debugfs',
                   action = 'store_true',
                   default = False,
                   dest = 'debugfs',
                   help = 'retrieve statistics from debugfs',
                   )
options.add_option('-f', '--fields',
                   action = 'store',
                   default = None,
                   dest = 'fields',
                   help = 'fields to display (regex)',
                   )
(options, args) = options.parse_args(sys.argv)

providers = []
if options.tracepoints:
    providers.append(TracepointProvider())
if options.debugfs:
    providers.append(DebugfsProvider())

if len(providers) == 0:
    try:
        providers = [TracepointProvider()]
    except:
        providers = [DebugfsProvider()]

stats = Stats(providers, fields = options.fields)

if options.log:
    log(stats)
elif not options.once:
    import curses.wrapper
    curses.wrapper(tui, stats)
else:
    batch(stats)
